# Imported Inequality 

#Create migrant shares from OECD data


#Upload year-by-year figures
# download annual files from https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/internationalmigration/datasets/populationoftheunitedkingdombycountryofbirthandnationality
file_list <- list.files("external/",
                        full.names = T)

ind <- grep('.txt', file_list)
file_list <- file_list[-ind]

for_shares <- tibble()

# loop over file list
popn_data <- purrr::map_dfr(file_list, function(file_name){
      #check <- function(file_name) {
      # upload
      data <- read_xls(file_name, sheet = "1.1")
      
      #Keep total UK population
      popn <- data[9, 3]
      
      #Keep total non-UK population
      migrants <- data[9, 8]
      
      year <- substr(file_name, 43, 54)
      
      for_yr <- bind_cols(year, popn, migrants)
      
      for_shares <- bind_rows(for_shares, for_yr)
      
     #return(for_shares)
    }
  )

#Check years are correct
popn_data$tax_year <- c(2004:2019, 2000:2003)

#Tidy
popn_data <- popn_data %>% 
  mutate(`Total population` = as.numeric(...2),
         Migrants = as.numeric(...3)) %>% 
  select(tax_year, `Total population`, Migrants) %>% 
  arrange(tax_year)

popn_data <- popn_data %>% 
  mutate(Share = Migrants/`Total population`)

#Save
write_csv(popn_data, "output/Migrant_share.csv")

#Add top share counts
data_ts <- read_csv("output/ts_migrant_comb_18plus.csv")
data_ts <- data_ts %>% 
  filter(ts == 0.01 & group == "all") %>% 
  mutate(migrant_P99 = migrants_count/1000, #ONS data are in '000s
         count_P99 = topshare_count/1000) %>% 
  select(tax_year, migrant_P99, count_P99)

popn_data <- left_join(popn_data, data_ts)

popn_data <- popn_data %>% 
  mutate(migrant_prop_P0P99 = (Migrants-migrant_P99)/(`Total population`-count_P99),
         growth = migrant_prop_P0P99/migrant_prop_P0P99[1])
